{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "##  模型构建——C盘为例\n",
    "# 核心部分：构建基于ARIMA或者ARMA的模型，采用AIC/BIC/HQ信息准则对模型进行定阶，确定p,q参数，从而选择最优模型\n",
    "# 根据自相关和偏相关图判定平稳性，确定了所用模型是采用ARMA或者ARIMA，而不是AR或者MA；\n",
    "# 注意：不管是ARMA还是ARIMA模型，都是对平稳数据建模。前者是直接针对平稳数据建模，无需进行差分变换；后者则需要先对数据进行差分，差分平稳后再建模。\n",
    "# 此处讨论了四种组合：ARIMA+AIC、ARIMA+BIC、ARMA+AIC、ARMA+BIC 。注意：基于HQ信息准则进行的模型在3_2buildModel_C_HQ_ARIMA.py中介绍，发现此案例中不适合使用HQ\n",
    "# 步骤：平稳性检验--> 白噪声检验--> 模型识别--> 模型检验\n",
    "import pandas as pd\n",
    "inputfile = 'attrsConstruction.xlsx'\n",
    "\n",
    "data = pd.read_excel(inputfile)\n",
    "df = data.iloc[:len(data)-5] #不使用后五个值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(-0.56351181471562739, 0.87901557028092969, 3L, 38L, {'5%': -2.9412623574865142, '1%': -3.6155091011809297, '10%': -2.6091995013850418}, 859.99762204232331)\n",
      "0.879015570281\n",
      "原始序列经过1阶差分后归于平稳，p值为9.57297559233e-07\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\compat\\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n",
      "  from pandas.core import datetools\n",
      "D:\\Anaconda2\\lib\\site-packages\\ipykernel_launcher.py:17: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n"
     ]
    }
   ],
   "source": [
    "# 第 * 1 * 步--C盘---------平稳性检测\n",
    "#1)平稳性检测 ：判断是否平稳，若不平稳，对其进行差分处理直至平稳\n",
    "# 方法：此处采用单位根检验（ADF）的方法或者时序图的方法（见数据探索模块）\n",
    "# 注意：其他平稳性检验方法见steadyCheck.py文件\n",
    "from statsmodels.tsa.stattools import adfuller as ADF\n",
    "diff = 0\n",
    "# 判断D盘数据的平稳性，以及确定几次差分后平稳\n",
    "adf = ADF(df['CWXT_DB:184:C:\\\\'])\n",
    "print adf \n",
    "\n",
    "while adf[1] >= 0.05 : # adf[1]是p值，p值小于0.05认为是平稳的\n",
    "    print adf[1]\n",
    "    diff = diff + 1\n",
    "    adf = ADF(df['CWXT_DB:184:C:\\\\'].diff(diff).dropna())#注意，差分后使用ADF检验时，必须去掉空值\n",
    "    \n",
    "print (u'原始序列经过%s阶差分后归于平稳，p值为%s') % (diff, adf[1])\n",
    "df['CWXT_DB:184:C:\\\\_adf'] = df['CWXT_DB:184:C:\\\\'].diff(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "原始序列为非白噪声序列，对应的p值为：1.06099075081e-08\n",
      "一阶差分序列为白噪声序列，对应的p值为：0.474552255255\n"
     ]
    }
   ],
   "source": [
    "# 第 * 2 * 步--C盘---------白噪声检验\n",
    "# 目的：验证序列中有用信息是否已经被提取完毕，需要进行白噪声检验。若序列是白噪声序列，说明序列中有用信息已经被提取完，只剩随机扰动\n",
    "# 方法：采用LB统计量的方法进行白噪声检验\n",
    "# 若没有通过白噪声检验，则需要进行模型识别，识别其模型属于AR、MA还是ARMA。\n",
    "\n",
    "inputfile2 = 'attrsConstruction.xlsx'\n",
    "data1 = pd.read_excel(inputfile2)\n",
    "data1 = data1.iloc[:len(data1)-5]# 不使用最后五个数据（作为预测参考）\n",
    "\n",
    "# 白噪声检测\n",
    "from statsmodels.stats.diagnostic import acorr_ljungbox\n",
    "\n",
    "[[lb], [p]] = acorr_ljungbox(data1['CWXT_DB:184:C:\\\\'], lags = 1) ## lags是残差延迟个数\n",
    "if p < 0.05:\n",
    "    print (u'原始序列为非白噪声序列，对应的p值为：%s' % p)\n",
    "else:\n",
    "    print (u'原始序列为白噪声序列，对应的p值为：%s' % p)\n",
    "\n",
    "[[lb], [p]] = acorr_ljungbox(data1['CWXT_DB:184:C:\\\\'].diff(1).dropna(), lags = 1)\n",
    "if p < 0.05:\n",
    "    print (u'一阶差分序列为非白噪声序列，对应的p值为：%s' % p)\n",
    "else:\n",
    "    print (u'一阶差分序列为白噪声序列，对应的p值为：%s' % p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1162.97145299\n",
      "1164.16658441\n",
      "1164.13685052\n",
      "1159.85961779\n",
      "1161.79569655\n",
      "1164.5040586\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:628: RuntimeWarning: overflow encountered in exp\n",
      "  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:628: RuntimeWarning: invalid value encountered in divide\n",
      "  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:629: RuntimeWarning: overflow encountered in exp\n",
      "  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:629: RuntimeWarning: invalid value encountered in divide\n",
      "  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:584: RuntimeWarning: overflow encountered in exp\n",
      "  newparams = ((1-np.exp(-params))/\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:585: RuntimeWarning: overflow encountered in exp\n",
      "  (1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:585: RuntimeWarning: invalid value encountered in divide\n",
      "  (1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:586: RuntimeWarning: overflow encountered in exp\n",
      "  tmp = ((1-np.exp(-params))/\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:587: RuntimeWarning: overflow encountered in exp\n",
      "  (1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:587: RuntimeWarning: invalid value encountered in divide\n",
      "  (1+np.exp(-params))).copy()\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:473: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available\n",
      "  'available', HessianInversionWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1162.11685863\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1161.44325304\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1163.00734192\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1165.39225005\n",
      "1162.0256587\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\tsa\\tsatools.py:612: RuntimeWarning: divide by zero encountered in divide\n",
      "  invarcoefs = -np.log((1-params)/(1+params))\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1162.76762974\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1166.5602383\n",
      "1161.90548513\n",
      "1163.14100707\n",
      "1165.11952847\n",
      "1166.82308378\n",
      "1163.0849435\n",
      "1165.0840491\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1164.45767661\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n",
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1164.84692728\n",
      "             0            1            2            3            4\n",
      "0  1162.971453  1164.166584  1164.136851  1159.859618  1161.795697\n",
      "1  1164.504059          NaN  1162.116859  1161.443253  1163.007342\n",
      "2  1165.392250  1162.025659          NaN  1162.767630  1166.560238\n",
      "3  1161.905485  1163.141007  1165.119528  1166.823084          NaN\n",
      "4  1163.084943  1165.084049  1164.457677          NaN  1164.846927\n",
      "0  0    1162.971453\n",
      "   1    1164.166584\n",
      "   2    1164.136851\n",
      "   3    1159.859618\n",
      "   4    1161.795697\n",
      "1  0    1164.504059\n",
      "   2    1162.116859\n",
      "   3    1161.443253\n",
      "   4    1163.007342\n",
      "2  0    1165.392250\n",
      "   1    1162.025659\n",
      "   3    1162.767630\n",
      "   4    1166.560238\n",
      "3  0    1161.905485\n",
      "   1    1163.141007\n",
      "   2    1165.119528\n",
      "   3    1166.823084\n",
      "4  0    1163.084943\n",
      "   1    1165.084049\n",
      "   2    1164.457677\n",
      "   4    1164.846927\n",
      "dtype: float64\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda2\\lib\\site-packages\\statsmodels\\base\\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  \"Check mle_retvals\", ConvergenceWarning)\n"
     ]
    }
   ],
   "source": [
    "# 第 * 3 * 步----------模型识别\n",
    "# 方法：采用极大似然比方法进行模型的参数估计，估计各个参数的值。\n",
    "# 然后针对各个不同模型，方法一：采用BIC信息准则对模型进行定阶，确定p,q参数，从而选择最优模型。\n",
    "# 注意，进行此步时，index需要为时间序列类型\n",
    "# 确定最佳p、d、q的值\n",
    "inputfile3 = 'attrsConstruction.xlsx'\n",
    "data2 = pd.read_excel(inputfile3,index_col='COLLECTTIME')\n",
    "xtest_value=data2['CWXT_DB:184:C:\\\\'][-5:]\n",
    "data2 = data2.iloc[:len(data2)-5]# 不使用最后五个数据（作为预测参考） \n",
    "xdata2 = data2['CWXT_DB:184:C:\\\\']\n",
    "\n",
    "# ARIMA（p,d,q）/ARIMA（p,q）中,AR是自回归,p为自回归项数；MA为滑动平均,q为滑动平均项数,\n",
    "#　注意：ARIMA中的d为使之成为平稳序列所做的差分次数(阶数)，由前一步骤知d=1\n",
    "# from statsmodels.tsa.arima_model import ARMA #建立ARMA（p,q）模型 \n",
    "from statsmodels.tsa.arima_model import ARIMA#建立ARIMA（p,1，q）模型\n",
    "\n",
    "# 定阶\n",
    "# 目前选择模型常用如下准则!!!!!\n",
    "# 增加自由参数的数目提高了拟合的优良性，\n",
    "# AIC/BIC/HQ鼓励数据拟合的优良性但是尽量避免出现过度拟合(Overfitting)的情况。所以优先考虑的模型应是AIC/BIC/HQ值最小的那一个\n",
    "# * AIC=-2 ln(L) + 2 k 中文名字：赤池信息量 akaike information criterion (AIC)\n",
    "# * BIC=-2 ln(L) + ln(n)*k 中文名字：贝叶斯信息量 bayesian information criterion (BIC)\n",
    "# * HQ=-2 ln(L) + ln(ln(n))*k hannan-quinn criterion (HQ)\n",
    "\n",
    "# 　以AIC方式定信息准则　＋　ARIMA为例------------！！！模型检验中也要对应修改！！！------------\n",
    "\n",
    "pmax = int(len(xdata2)/10) # 一般阶数不超过length/10\n",
    "qmax = int(len(xdata2)/10) # 一般阶数不超过length/10\n",
    "\n",
    "aic_matrix = [] # 矩阵\n",
    "for p in range(pmax+1):\n",
    "    tmp = []\n",
    "    for q in range(qmax+1):\n",
    "        try:\n",
    "            print ARIMA(xdata2, (p,1,q)).fit().aic\n",
    "            tmp.append(ARIMA(xdata2, (p,1,q)).fit().aic) #存在部分为空值，会报错\n",
    "#             tmp.append(ARIMA(xdata2, (p,1,q)).fit().bic) #  BIC方式\n",
    "#             tmp.append(ARIMA(xdata2, (p,1,q)).fit().hq) #  AIC方式\n",
    "        except:\n",
    "            tmp.append(None)\n",
    "            \n",
    "    aic_matrix.append(tmp)\n",
    "    \n",
    "aic_matrix = pd.DataFrame(aic_matrix) # 从中可以找出最小值\n",
    "print aic_matrix\n",
    "print aic_matrix.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当前AIC最小的p值与q值分别为：0、3\n",
      "(0, 3)\n",
      "模型ARIMA(0,1,3)符合白噪声检验\n"
     ]
    }
   ],
   "source": [
    "# 第 * 4 * 步--C盘---------模型检验\n",
    "# 确定模型后，需要检验其残差序列是否是白噪声，若不是，说明，残差中还存在有用的信息，需要修改模型或者进一步提取。\n",
    "# 若其残差不是白噪声，重新更换p,q的值，重新确定\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "while 1:\n",
    "    p, q = aic_matrix.stack().idxmin() # 先展平该表格，然后找出最小值的索引位置\n",
    "    print (u'当前AIC最小的p值与q值分别为：%s、%s' % (p,q))\n",
    "    \n",
    "    lagnum = 12 # 残差延迟个数\n",
    "\n",
    "    # 由模型识别可知第一次BIC最小的p值与q值分别为：0、1\n",
    "\n",
    "    arima = ARIMA(xdata2, (p,1,q)).fit() # 建立并训练模型\n",
    "    xdata_pred = arima.predict(typ = 'levels') # 预测\n",
    "    pred_error = (xdata_pred - xdata2).dropna() # 计算残差\n",
    "\n",
    "    # 白噪声检测\n",
    "    from statsmodels.stats.diagnostic import acorr_ljungbox\n",
    "\n",
    "    lbx, px = acorr_ljungbox(pred_error, lags = lagnum)\n",
    "    h = (px < 0.05).sum() # p值小于0.05，认为是非噪声\n",
    "    if h > 0:\n",
    "        print (u'模型ARIMA(%s,1,%s)不符合白噪声检验' % (p,q))\n",
    "        print ('在AIC矩阵中去掉[%s,%s]组合，重新进行计算' % (p,q))\n",
    "        aic_matrix.iloc[p,q] =  np.nan\n",
    "        arimafail = arima\n",
    "        continue\n",
    "    else:\n",
    "        print (p,q)\n",
    "        print (u'模型ARIMA(%s,1,%s)符合白噪声检验' % (p,q))\n",
    "        break\n",
    "        \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>ARIMA Model Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th> <td>D.CWXT_DB:184:C:\\</td> <th>  No. Observations:  </th>     <td>41</td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>          <td>ARIMA(0, 1, 3)</td>   <th>  Log Likelihood     </th>  <td>-574.930</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>css-mle</td>      <th>  S.D. of innovations</th> <td>287517.428</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>          <td>Tue, 14 Nov 2017</td>  <th>  AIC                </th>  <td>1159.860</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>              <td>21:19:27</td>      <th>  BIC                </th>  <td>1168.427</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Sample:</th>           <td>10-02-2014</td>     <th>  HQIC               </th>  <td>1162.980</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th></th>                 <td>- 11-11-2014</td>    <th>                     </th>      <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "             <td></td>                <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>                   <td> 4.583e+04</td> <td> 7428.875</td> <td>    6.169</td> <td> 0.000</td> <td> 3.13e+04</td> <td> 6.04e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>ma.L1.D.CWXT_DB:184:C:\\</th> <td>   -0.2680</td> <td>    0.166</td> <td>   -1.610</td> <td> 0.116</td> <td>   -0.594</td> <td>    0.058</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>ma.L2.D.CWXT_DB:184:C:\\</th> <td>   -0.3221</td> <td>    0.141</td> <td>   -2.278</td> <td> 0.029</td> <td>   -0.599</td> <td>   -0.045</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>ma.L3.D.CWXT_DB:184:C:\\</th> <td>   -0.4055</td> <td>    0.128</td> <td>   -3.177</td> <td> 0.003</td> <td>   -0.656</td> <td>   -0.155</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<caption>Roots</caption>\n",
       "<tr>\n",
       "    <td></td>   <th>           Real</th> <th>         Imaginary</th> <th>         Modulus</th> <th>        Frequency</th>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>MA.1</th>               1.0021                    -0.0000j                    1.0021                   -0.0000     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>MA.2</th>              -0.8982                    -1.2862j                    1.5688                   -0.3470     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>MA.3</th>              -0.8982                    +1.2862j                    1.5688                    0.3470     \n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                             ARIMA Model Results                              \n",
       "==============================================================================\n",
       "Dep. Variable:      D.CWXT_DB:184:C:\\   No. Observations:                   41\n",
       "Model:                 ARIMA(0, 1, 3)   Log Likelihood                -574.930\n",
       "Method:                       css-mle   S.D. of innovations         287517.428\n",
       "Date:                Tue, 14 Nov 2017   AIC                           1159.860\n",
       "Time:                        21:19:27   BIC                           1168.427\n",
       "Sample:                    10-02-2014   HQIC                          1162.980\n",
       "                         - 11-11-2014                                         \n",
       "===========================================================================================\n",
       "                              coef    std err          z      P>|z|      [0.025      0.975]\n",
       "-------------------------------------------------------------------------------------------\n",
       "const                    4.583e+04   7428.875      6.169      0.000    3.13e+04    6.04e+04\n",
       "ma.L1.D.CWXT_DB:184:C:\\    -0.2680      0.166     -1.610      0.116      -0.594       0.058\n",
       "ma.L2.D.CWXT_DB:184:C:\\    -0.3221      0.141     -2.278      0.029      -0.599      -0.045\n",
       "ma.L3.D.CWXT_DB:184:C:\\    -0.4055      0.128     -3.177      0.003      -0.656      -0.155\n",
       "                                    Roots                                    \n",
       "=============================================================================\n",
       "                 Real           Imaginary           Modulus         Frequency\n",
       "-----------------------------------------------------------------------------\n",
       "MA.1            1.0021           -0.0000j            1.0021           -0.0000\n",
       "MA.2           -0.8982           -1.2862j            1.5688           -0.3470\n",
       "MA.3           -0.8982           +1.2862j            1.5688            0.3470\n",
       "-----------------------------------------------------------------------------\n",
       "\"\"\""
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arima.summary() # 注意当p,q值为0，0时，summary方法报错"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 35779972.17486547,  35836938.19020772,  35889600.65376466,\n",
       "        35935428.1773845 ,  35981255.70100433])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forecast_values, forecasts_standard_error, forecast_confidence_interval = arima.forecast(5)\n",
    "forecast_values\n",
    "# arimaf = ARIMA(xdata2, (0,1,1)).fit()\n",
    "# arimaf.forecast(5)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "DatetimeIndex: 5 entries, 2014-11-12 to 2014-11-16\n",
      "Data columns (total 2 columns):\n",
      "实际值    5 non-null float64\n",
      "预测值    5 non-null float64\n",
      "dtypes: float64(2)\n",
      "memory usage: 120.0 bytes\n"
     ]
    }
   ],
   "source": [
    "predictdata = pd.DataFrame(xtest_value)\n",
    "predictdata.insert(1,'CWXT_DB:184:C:\\\\_predict',forecast_values)\n",
    "predictdata.rename(columns={'CWXT_DB:184:C:\\\\':u'实际值','CWXT_DB:184:C:\\_predict':u'预测值'},inplace=True)\n",
    "predictdata.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>实际值</th>\n",
       "      <th>预测值</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLLECTTIME</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2014-11-12</th>\n",
       "      <td>35704312.58</td>\n",
       "      <td>35779972.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-11-13</th>\n",
       "      <td>35704980.73</td>\n",
       "      <td>35836938.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-11-14</th>\n",
       "      <td>34570385.45</td>\n",
       "      <td>35889600.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-11-15</th>\n",
       "      <td>34673820.69</td>\n",
       "      <td>35935428.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-11-16</th>\n",
       "      <td>34793245.31</td>\n",
       "      <td>35981255.70</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     实际值          预测值\n",
       "COLLECTTIME                          \n",
       "2014-11-12   35704312.58  35779972.17\n",
       "2014-11-13   35704980.73  35836938.19\n",
       "2014-11-14   34570385.45  35889600.65\n",
       "2014-11-15   34673820.69  35935428.18\n",
       "2014-11-16   34793245.31  35981255.70"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_d = predictdata.applymap(lambda x: '%.2f' % x) # 将表格中各个浮点值都格式化\n",
    "result_d.to_excel('pedictdata_C.xlsx')\n",
    "result_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   实际值        预测值\n",
      "COLLECTTIME                      \n",
      "2014-11-12   35.704313  35.779972\n",
      "2014-11-13   35.704981  35.836938\n",
      "2014-11-14   34.570385  35.889601\n",
      "2014-11-15   34.673821  35.935428\n",
      "2014-11-16   34.793245  35.981256\n",
      "0.795290026\n",
      "0.976369605661\n",
      "0.0229009946085\n",
      "误差阈值为1.5\n",
      "AIC模型下平均绝对误差为：0.7953, \n",
      "均方根误差为：0.9764, \n",
      "平均绝对百分误差为：0.0229\n",
      "误差检验通过！\n"
     ]
    }
   ],
   "source": [
    "# 第 * 5 * 步--D盘---------模型评价\n",
    "# 为了评价时序预测模型效果的好坏，本章采用3个衡量模型预测精度的统计量指标：平均绝对误差、均方根误差、平均绝对百分误差\n",
    "# -*- coding:utf-8 -*-\n",
    "import pandas as pd\n",
    "\n",
    "inputfile4 = 'pedictdata_C.xlsx'\n",
    "result = pd.read_excel(inputfile4,index_col='COLLECTTIME')\n",
    "result = result.applymap(lambda x: x/10**6)\n",
    "print result\n",
    "\n",
    "# 计算误差\n",
    "abs_ = (result[u'预测值']-result[u'实际值']).abs()\n",
    "mae_ = abs_.mean() # mae平均绝对误差\n",
    "rmas_ = ((abs_**2).mean())**0.5 #rmas均方根误差\n",
    "mape_ = (abs_/result[u'实际值']).mean() #mape平均绝对百分误差\n",
    "print mae_\n",
    "print rmas_\n",
    "print mape_\n",
    "errors = 1.5\n",
    "print '误差阈值为%s' % errors\n",
    "if (mae_ < errors) & (rmas_ < errors) & (mape_ < errors):\n",
    "    print (u'AIC模型下平均绝对误差为：%.4f, \\n均方根误差为：%.4f, \\n平均绝对百分误差为：%.4f' % (mae_, rmas_, mape_))\n",
    "    print '误差检验通过！'\n",
    "else:\n",
    "    print '误差检验不通过！'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 注意：\n",
    "# 说明：由于用HQ训练模型时，都是空值，所以，本例使用HQ不合适\n",
    "#-----ARIMA--BIC---\n",
    "#                    实际值        预测值\n",
    "# COLLECTTIME                      \n",
    "# 2014-11-12   35.704313  35.722538\n",
    "# 2014-11-13   35.704981  35.757104\n",
    "# 2014-11-14   34.570385  35.791669\n",
    "# 2014-11-15   34.673821  35.826235\n",
    "# 2014-11-16   34.793245  35.860800\n",
    "# 0.70232013\n",
    "# 0.890203752645\n",
    "# 0.0202432790493\n",
    "# 误差阈值为1.5\n",
    "# BIC模型下平均绝对误差为：0.7023, \n",
    "# 均方根误差为：0.8902, \n",
    "# 平均绝对百分误差为：0.0202\n",
    "# 误差检验通过！\n",
    "#-----ARIMA--AIC---\n",
    "#                    实际值        预测值\n",
    "# COLLECTTIME                      \n",
    "# 2014-11-12   35.704313  35.779972\n",
    "# 2014-11-13   35.704981  35.836938\n",
    "# 2014-11-14   34.570385  35.889601\n",
    "# 2014-11-15   34.673821  35.935428\n",
    "# 2014-11-16   34.793245  35.981256\n",
    "# 0.795290026\n",
    "# 0.976369605661\n",
    "# 0.0229009946085\n",
    "# AIC模型下平均绝对误差为：0.7953, \n",
    "# 均方根误差为：0.9764, \n",
    "# 平均绝对百分误差为：0.0229\n",
    "# 误差检验通过！\n",
    "# 通过对比AIC与BIC的结果，可以发现BIC的几个误差均较小\n",
    "#-----ARMA--BIC---\n",
    "#                    实际值        预测值\n",
    "# COLLECTTIME                      \n",
    "# 2014-11-12   35.704313  35.581706\n",
    "# 2014-11-13   35.704981  35.488223\n",
    "# 2014-11-14   34.570385  35.405986\n",
    "# 2014-11-15   34.673821  35.333641\n",
    "# 2014-11-16   34.793245  35.270000\n",
    "# 0.462308002\n",
    "# 0.533460826783\n",
    "# 0.0132815193493\n",
    "# 误差阈值为1.5\n",
    "# 平均绝对误差为：0.4623, \n",
    "# 均方根误差为：0.5335, \n",
    "# 平均绝对百分误差为：0.0133\n",
    "# 误差检验通过！\n",
    "\n",
    "# 综上：ARMA+BIC更优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
